

use "$pub\medline_umetrics_xwalk", clear

merge m:1 pmid using "$pubmed\pmid_citations", keep(1 3) nogen

merge m:1 pmid using "$pubmed\pmid_journalid", keep(1 3) nogen

merge m:1 pmid using "$pubmed\basicness", keep(1 3) nogen

merge m:1 pmid using "$pubmednew/pubmed2020_pmid_unique_ui3", keep(1 3) nogen

gen npub=1

bysort pub_year ui3: egen mean_cite_3yr=median(fcites_3yr)
bysort pub_year ui3: egen mean_cite_lifetime=median(fcites_lifetime)
gen npub_highcite1 = npub*(fcites_3yr>=mean_cite_3yr & fcites_3yr<.)
gen npub_highcite2 = npub*(fcites_lifetime>=mean_cite_lifetime & fcites_lifetime<.)

gen npub_patent=(citation_patent>=1)
gen npub_highjif=(jif>=2 & jif<.)
gen npub_vhighjif=(jif>=6.79 & jif<.)
gen npub_highjcif=(jcif>=0.01 & jcif<.)
gen npub_applied=(level_score>0.04 & level_score<.)
gen npub_basic=(level_score<0.04)
gen citation_3yr=fcites_3yr

collapse (sum) npub* fcites* (mean) citation_3yr jif citation_patent level_score, by (emp_number pubyear)
ren pubyear year
compress

save "$pub\publication", replace


use "$data/regression_sample", clear
cap drop npub* fcites* citation_3yr jif citation_patent level_score
ren iris emp_num_2019
merge m:1 emp_num_2019 using "$rawdata\emp_num_xwalk_2020", keep(1 3) nogen
ren emp_num_2020 emp_number
merge m:1 emp_number year using "$pub\publication", keep(1 3) nogen
foreach var in npub npub_highcite1 npub_highcite2 npub_patent fcites_3yr fcites_5yr fcites_10yr fcites_lifetime npub_highjcif npub_highjif npub_vhighjif npub_applied npub_basic {
    replace `var'=0 if `var'==.
}
foreach var in citation_3yr jif citation_patent level_score {
	replace `var'=. if npub==0
}
drop emp_number
ren emp_num_2019 iris_employee_number
compress
save "$data/regression_sample", replace
